In [72]:
%%time 
!apt update
!apt upgrade
!apt install gdal-bin python-gdal python3-gdal 
# Install rtree - Geopandas requirment
!apt install python3-rtree 
# Install Geopandas
!pip install git+git://github.com/geopandas/geopandas.git
# Install descartes - Geopandas requirment
!pip install descartes 
'apt' is not recognized as an internal or external command,
operable program or batch file.
'apt' is not recognized as an internal or external command,
operable program or batch file.
'apt' is not recognized as an internal or external command,
operable program or batch file.
'apt' is not recognized as an internal or external command,
operable program or batch file.
Collecting git+git://github.com/geopandas/geopandas.git
  Cloning git://github.com/geopandas/geopandas.git to c:\users\trupti\appdata\local\temp\pip-req-build-gwjptzuk
Requirement already satisfied: pandas>=0.23.0 in d:\anaconda\lib\site-packages (from geopandas==0.7.0+62.gcfbf5d6) (0.25.1)
Collecting shapely (from geopandas==0.7.0+62.gcfbf5d6)
  Using cached https://files.pythonhosted.org/packages/ea/55/61a5d274a210585b5d0c3dac81a82952a4baa7903e3642228d7a465fc340/Shapely-1.7.0-cp37-cp37m-win_amd64.whl
Collecting fiona (from geopandas==0.7.0+62.gcfbf5d6)
  Using cached https://files.pythonhosted.org/packages/6d/42/f4a7cac53b28fa70e9a93d0e89a24d33e14826dad6644b699362ad84dde0/Fiona-1.8.13.post1.tar.gz
  Running command git clone -q git://github.com/geopandas/geopandas.git 'C:\Users\Trupti\AppData\Local\Temp\pip-req-build-gwjptzuk'
  WARNING: Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<pip._vendor.urllib3.connection.VerifiedHTTPSConnection object at 0x000001D464C1F6C8>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')': /simple/shapely/
    ERROR: Command errored out with exit status 1:
     command: 'D:\Anaconda\python.exe' -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'C:\\Users\\Trupti\\AppData\\Local\\Temp\\pip-install-sdihkozc\\fiona\\setup.py'"'"'; __file__='"'"'C:\\Users\\Trupti\\AppData\\Local\\Temp\\pip-install-sdihkozc\\fiona\\setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' egg_info --egg-base pip-egg-info
         cwd: C:\Users\Trupti\AppData\Local\Temp\pip-install-sdihkozc\fiona\
    Complete output (1 lines):
    A GDAL API version must be specified. Provide a path to gdal-config using a GDAL_CONFIG environment variable or use a GDAL_VERSION environment variable.
    ----------------------------------------
ERROR: Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.
Requirement already satisfied: descartes in d:\anaconda\lib\site-packages (1.1.0)
Requirement already satisfied: matplotlib in d:\anaconda\lib\site-packages (from descartes) (3.1.1)
Requirement already satisfied: cycler>=0.10 in d:\anaconda\lib\site-packages (from matplotlib->descartes) (0.10.0)
Requirement already satisfied: kiwisolver>=1.0.1 in d:\anaconda\lib\site-packages (from matplotlib->descartes) (1.1.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in d:\anaconda\lib\site-packages (from matplotlib->descartes) (2.4.2)
Requirement already satisfied: python-dateutil>=2.1 in d:\anaconda\lib\site-packages (from matplotlib->descartes) (2.8.0)
Requirement already satisfied: numpy>=1.11 in d:\anaconda\lib\site-packages (from matplotlib->descartes) (1.16.5)
Requirement already satisfied: six in d:\anaconda\lib\site-packages (from cycler>=0.10->matplotlib->descartes) (1.12.0)
Requirement already satisfied: setuptools in d:\anaconda\lib\site-packages (from kiwisolver>=1.0.1->matplotlib->descartes) (41.4.0)
Wall time: 1min 46s
In [1]:
from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)

# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)
In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
#import geopandas as gpd
import os
#import folium
import gc
#from folium.plugins import TimeSliderChoropleth
from scipy.signal import find_peaks
In [3]:
import warnings
warnings.filterwarnings("ignore")
In [4]:
data = pd.read_csv("https://covid19.isciii.es/resources/serie_historica_acumulados.csv", delimiter=",", encoding="latin1",skiprows=range(1730,1739))
In [5]:
print(data.head())
print(data.tail(10))
  CCAA      FECHA  CASOS  PCR+  TestAc+  Hospitalizados  UCI  Fallecidos
0   AN  20/2/2020    0.0     0      NaN             NaN  NaN         NaN
1   AR  20/2/2020    NaN     0      NaN             NaN  NaN         NaN
2   AS  20/2/2020    NaN     0      NaN             NaN  NaN         NaN
3   IB  20/2/2020    NaN     1      NaN             NaN  NaN         NaN
4   CN  20/2/2020    NaN     1      NaN             NaN  NaN         NaN
     CCAA      FECHA  CASOS   PCR+  TestAc+  Hospitalizados     UCI  \
1719   CE  20/5/2020    NaN    119     59.0            11.0     4.0   
1720   VC  20/5/2020    NaN  10987   3810.0          5747.0   730.0   
1721   EX  20/5/2020    NaN   3042   1001.0          1780.0   110.0   
1722   GA  20/5/2020    NaN   9077   2026.0          2943.0   334.0   
1723   MD  20/5/2020    NaN  67049   5293.0         42497.0  3617.0   
1724   ML  20/5/2020    NaN    121     13.0            44.0     3.0   
1725   MC  20/5/2020    NaN   1570   1039.0           680.0   112.0   
1726   NC  20/5/2020    NaN   5195   3157.0          2048.0   136.0   
1727   PV  20/5/2020    NaN  13421   5358.0          7032.0   578.0   
1728   RI  20/5/2020    NaN   4033   1395.0          1504.0    91.0   

      Fallecidos  
1719         4.0  
1720      1383.0  
1721       505.0  
1722       608.0  
1723      8931.0  
1724         2.0  
1725       149.0  
1726       506.0  
1727      1483.0  
1728       354.0  
In [6]:
#Get rid of the last rows, which content some description about how the data have been updated.
data=data[~data.CCAA.str.startswith('NOTA') & ~data.CCAA.str.startswith('Los') & ~data.CCAA.str.startswith('*') & ~data.CCAA.str.startswith('**') & ~data.CCAA.str.startswith('***') & ~data.CCAA.str.startswith('****')]
In [7]:
data.tail()
Out[7]:
CCAA FECHA CASOS PCR+ TestAc+ Hospitalizados UCI Fallecidos
1724 ML 20/5/2020 NaN 121 13.0 44.0 3.0 2.0
1725 MC 20/5/2020 NaN 1570 1039.0 680.0 112.0 149.0
1726 NC 20/5/2020 NaN 5195 3157.0 2048.0 136.0 506.0
1727 PV 20/5/2020 NaN 13421 5358.0 7032.0 578.0 1483.0
1728 RI 20/5/2020 NaN 4033 1395.0 1504.0 91.0 354.0
In [8]:
data.fillna(0)#missing values with 0
Out[8]:
CCAA FECHA CASOS PCR+ TestAc+ Hospitalizados UCI Fallecidos
0 AN 20/2/2020 0.0 0 0.0 0.0 0.0 0.0
1 AR 20/2/2020 0.0 0 0.0 0.0 0.0 0.0
2 AS 20/2/2020 0.0 0 0.0 0.0 0.0 0.0
3 IB 20/2/2020 0.0 1 0.0 0.0 0.0 0.0
4 CN 20/2/2020 0.0 1 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ...
1724 ML 20/5/2020 0.0 121 13.0 44.0 3.0 2.0
1725 MC 20/5/2020 0.0 1570 1039.0 680.0 112.0 149.0
1726 NC 20/5/2020 0.0 5195 3157.0 2048.0 136.0 506.0
1727 PV 20/5/2020 0.0 13421 5358.0 7032.0 578.0 1483.0
1728 RI 20/5/2020 0.0 4033 1395.0 1504.0 91.0 354.0

1729 rows × 8 columns

In [9]:
#changing name of columns
data.rename(columns={"FECHA":"Date",
              "PCR+":"Infected",
             "Hospitalizados":"Hospitalized",
             "Fallecidos":"Deaths",
             "Recuperados":"Cured",
             "UCI":"ICU",
             "CASOS":"Cases"},inplace= True)
In [10]:
data.replace({"AN":"Andalucía","AR":"Aragón","AS":"Asturias",
                "IB":"Baleares","CN":"Canarias","CB":"Cantabria",
                 "CM":"Castilla La Mancha","CL":"Castilla y León","CT":"Cataluña",
              "CE":"Ceuta","VC":"C. Valenciana","EX":"Extremadura","GA":"Galicia",
             "MD":"Madrid","ML":"Melilla","MC":"Murcia","NC":"Navarra",
             "PV":"País Vasco","RI":"La Rioja"},inplace=True)
In [11]:
#numerical summary of data
data.describe()
Out[11]:
Cases Infected TestAc+ Hospitalized ICU Deaths
count 1.0 1729.000000 693.000000 1401.000000 1431.000000 1414.000000
mean 0.0 6322.744361 1508.998557 4181.917916 399.382250 810.548798
std NaN 12611.187640 1824.850585 8214.092934 753.886484 1622.152452
min 0.0 0.000000 0.000000 0.000000 0.000000 0.000000
25% 0.0 71.000000 114.000000 260.000000 29.000000 17.000000
50% 0.0 1752.000000 873.000000 1080.000000 118.000000 197.000000
75% 0.0 5858.000000 2023.000000 4397.000000 420.500000 773.000000
max 0.0 67049.000000 8634.000000 42497.000000 3617.000000 8931.000000
In [12]:
data.isnull().sum()
Out[12]:
CCAA               0
Date               0
Cases           1728
Infected           0
TestAc+         1036
Hospitalized     328
ICU              298
Deaths           315
dtype: int64
In [13]:
#convert the date values and create a new column called Time.
data.Date = pd.to_datetime(data.Date, format="%d/%m/%Y")
In [14]:
data["NEW_DATE"] = data.Date.apply(lambda x: x.strftime("%d %b, %Y"))
In [15]:
print(data.Date.min())
print(data.Date.max())
2020-02-20 00:00:00
2020-05-20 00:00:00
In [16]:
total_s = data.groupby(["Date","NEW_DATE"])["Date","Cases","Infected","TestAc+","Deaths","Hospitalized","ICU"].sum().reset_index()
total_s.head()
Out[16]:
Date NEW_DATE Cases Infected TestAc+ Deaths Hospitalized ICU
0 2020-02-20 20 Feb, 2020 0.0 3 0.0 0.0 26.0 0.0
1 2020-02-21 21 Feb, 2020 0.0 3 0.0 0.0 30.0 1.0
2 2020-02-22 22 Feb, 2020 0.0 3 0.0 0.0 33.0 1.0
3 2020-02-23 23 Feb, 2020 0.0 3 0.0 0.0 38.0 2.0
4 2020-02-24 24 Feb, 2020 0.0 4 0.0 0.0 49.0 2.0
In [17]:
#create variables with infectef & dead daily people
aux = total_s.Infected.to_list()

daily=[]

for i in range(len(aux)-1):
    b = aux[i+1] - aux[i]
    daily.append(b)
    
daily.insert(0,0)   

total_s["Daily_Infected"] = daily
In [18]:
aux = total_s.Deaths.to_list()

daily=[]

for i in range(len(aux)-1):
    b = aux[i+1] - aux[i]
    daily.append(b)
    
daily.insert(0,0)   

total_s["Daily_Deaths"] = daily
In [19]:
aux = total_s.Cases.to_list()

daily=[]

for i in range(len(aux)-1):
    b = aux[i+1] - aux[i]
    daily.append(b)
    
daily.insert(0,0)   

total_s["Daily_Cases"] = daily
In [20]:
total_s.head()
Out[20]:
Date NEW_DATE Cases Infected TestAc+ Deaths Hospitalized ICU Daily_Infected Daily_Deaths Daily_Cases
0 2020-02-20 20 Feb, 2020 0.0 3 0.0 0.0 26.0 0.0 0 0.0 0.0
1 2020-02-21 21 Feb, 2020 0.0 3 0.0 0.0 30.0 1.0 0 0.0 0.0
2 2020-02-22 22 Feb, 2020 0.0 3 0.0 0.0 33.0 1.0 0 0.0 0.0
3 2020-02-23 23 Feb, 2020 0.0 3 0.0 0.0 38.0 2.0 0 0.0 0.0
4 2020-02-24 24 Feb, 2020 0.0 4 0.0 0.0 49.0 2.0 1 0.0 0.0

PLOTTING

In [22]:
aux = total_s.melt(id_vars="Date", value_vars=("Cases","Infected","TestAc+","Deaths","ICU","Hospitalized"), value_name="Count" , var_name= "Description")
In [23]:
aux.head()
Out[23]:
Date Description Count
0 2020-02-20 Cases 0.0
1 2020-02-21 Cases 0.0
2 2020-02-22 Cases 0.0
3 2020-02-23 Cases 0.0
4 2020-02-24 Cases 0.0
In [24]:
fig = px.line (aux, x= "Date", y = "Count", color="Description", title= "Actual situation in Spain")
fig.show()
In [25]:
fig = px.area (total_s, x= "Date", y = "Daily_Infected", range_x=[data.Date.min(),data.Date.max(),], title= "Daily infections in Spain ")
fig.show()
In [26]:
aux = total_s.melt(id_vars="Date", value_vars=("Daily_Deaths","Daily_Cases"), value_name="Count" , var_name= "Description")
In [27]:
aux_i = total_s.melt(id_vars="Date", value_vars=("Daily_Deaths","Daily_Infected"), value_name="Count" , var_name= "Description")
In [28]:
fig = px.area (aux_i, x= "Date", y = "Count", range_x= [data.Date.min(),data.Date.max()], range_y=[0,data.Infected.max()], color="Description", title= "Daily infections and deaths")
fig.show()
In [29]:
fig = px.bar (aux_i, x= "Date", y = "Count", color="Description",  range_x= [data.Date.min(),data.Date.max()], range_y=[0,data.Infected.max()], title= "Daily infections and deaths" )
fig.show()
In [30]:
fig = px.line (total_s, x= "Date", y = "Deaths", title= "Total deaths in Spain", color_discrete_sequence = ['red'])
fig.show()
In [31]:
fig = px.area(total_s, x= "Date", y = "Daily_Deaths", title= "Daily deaths in Spain", color_discrete_sequence = ['red'])
fig.show()
In [32]:
data_infected = data[data.Date>"20-02-2020"]
In [33]:
fig = px.bar(data_infected, x="CCAA", y="Infected", color="CCAA",
              animation_frame="Time", animation_group="CCAA", range_y=[0,data.Infected.max()+1000],title= "Infections by regions over time")
fig.show()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-33-d7747431bcdc> in <module>
      1 fig = px.bar(data_infected, x="CCAA", y="Infected", color="CCAA",
----> 2               animation_frame="Time", animation_group="CCAA", range_y=[0,data.Infected.max()+1000],title= "Infections by regions over time")
      3 fig.show()

D:\Anaconda\lib\site-packages\plotly\express\_chart_types.py in bar(data_frame, x, y, color, facet_row, facet_col, facet_col_wrap, hover_name, hover_data, custom_data, text, error_x, error_x_minus, error_y, error_y_minus, animation_frame, animation_group, category_orders, labels, color_discrete_sequence, color_discrete_map, color_continuous_scale, range_color, color_continuous_midpoint, opacity, orientation, barmode, log_x, log_y, range_x, range_y, title, template, width, height)
    311         constructor=go.Bar,
    312         trace_patch=dict(orientation=orientation, textposition="auto"),
--> 313         layout_patch=dict(barmode=barmode),
    314     )
    315 

D:\Anaconda\lib\site-packages\plotly\express\_core.py in make_figure(args, constructor, trace_patch, layout_patch)
   1430 
   1431     args, trace_specs, grouped_mappings, sizeref, show_colorbar = infer_config(
-> 1432         args, constructor, trace_patch
   1433     )
   1434     grouper = [x.grouper or one_group for x in grouped_mappings] or [one_group]

D:\Anaconda\lib\site-packages\plotly\express\_core.py in infer_config(args, constructor, trace_patch)
   1273             all_attrables += [group_attr]
   1274 
-> 1275     args = build_dataframe(args, all_attrables, array_attrables)
   1276     if constructor in [go.Treemap, go.Sunburst] and args["path"] is not None:
   1277         args = process_dataframe_hierarchy(args)

D:\Anaconda\lib\site-packages\plotly\express\_core.py in build_dataframe(args, attrables, array_attrables)
   1035                             "\n To use the index, pass it in directly as `df.index`."
   1036                         )
-> 1037                     raise ValueError(err_msg)
   1038                 if length and len(df_input[argument]) != length:
   1039                     raise ValueError(

ValueError: Value of 'animation_frame' is not the name of a column in 'data_frame'. Expected one of ['CCAA', 'Date', 'Cases', 'Infected', 'TestAc+', 'Hospitalized', 'ICU', 'Deaths', 'NEW_DATE'] but received: Time

Analysis of Madrid

Madrid was the region with the highest number of infections and deaths. In the following graphics we will analyze the different variables. The cases related to infections were collected in the variable PCR+ (Infected), we can just use that variable just for Madrid.

In [34]:
total_madrid = data[data.CCAA=="Madrid"].groupby("Date")["Date","Infected","Deaths","Hospitalized","ICU"].sum().reset_index()
In [35]:
aux_m = total_madrid.melt(id_vars="Date", value_vars=("Infected","Deaths","ICU","Hospitalized"), value_name="Count" , var_name= "Status")
In [36]:
fig = px.line (aux_m, x= "Date", y = "Count", color="Status", title= "Actual situation in Madrid")
fig.show()
In [37]:
fig = px.bar(aux_m, x= "Date", y = "Count", color="Status", title= "Actual situation in Madrid")
fig.show()

Rest of regions

In [38]:
total_com = data.groupby(["Date","CCAA"])["Infected","Deaths","Hospitalized","ICU"].sum().reset_index()
total_com.head()
Out[38]:
Date CCAA Infected Deaths Hospitalized ICU
0 2020-02-20 Andalucía 0 0.0 0.0 0.0
1 2020-02-20 Aragón 0 0.0 0.0 0.0
2 2020-02-20 Asturias 0 0.0 0.0 0.0
3 2020-02-20 Baleares 1 0.0 0.0 0.0
4 2020-02-20 C. Valenciana 0 0.0 26.0 0.0
In [39]:
com = total_com[total_com.Date == max(total_com.Date)]
In [40]:
fig = px.bar (com.sort_values("Infected",ascending=False), x= "CCAA", 
              y = "Infected", color="CCAA", title= "Infections by region", text='CCAA', height=1000, orientation="v")
fig.show()
In [41]:
fig = px.bar (com.sort_values("Deaths",ascending=False), x= "CCAA", 
              y = "Deaths", color="CCAA", title= "Deaths by region", text='CCAA', height=1000, orientation="v")
fig.show()
In [42]:
fig = px.bar (com.sort_values("Hospitalized",ascending=False), x= "CCAA", 
              y = "Hospitalized", color="CCAA", title= "Hospitalizations by region", text='CCAA', height=1000, orientation="v")
fig.show()
In [43]:
fig = px.bar (com.sort_values("ICU",ascending=False), x= "CCAA", 
              y = "ICU", color="CCAA", title= "Advanced care by region", text='CCAA', height=1000, orientation="v")
fig.show()
In [44]:
fig = px.bar (com.sort_values("ICU",ascending=False), x= "CCAA", 
              y = "ICU", color="CCAA", title= "People who needed the intensive care", text='CCAA', height=1000, orientation="v")
fig.show()

Cummulative curves by region

In [45]:
aux_all = data.melt(id_vars=["Date", "CCAA"], value_vars=("Cases","Infected","Deaths","ICU","Hospitalized"), value_name="Count" , var_name= "Status")
In [46]:
aux_all_i = aux_all[aux_all["Status"]=="Infected"]
fig = px.line(aux_all_i,x="Date",y="Count",color="CCAA", range_x=["2020-02-20",data.Date.max()], title="Total infections by region")
fig.show()
In [47]:
aux_all_d = aux_all[aux_all["Status"]=="Deaths"]
fig = px.line(aux_all_d,x="Date",y="Count",color="CCAA", title="Total deaths by region")
fig.show()
In [49]:
# As the dataset was updated and the Ministry of Health Spain removed the Cured column, this graph cannot be plotted

aux_all_c = aux_all[aux_all["Status"]=="Deaths"]
fig = px.line(aux_all_c,x="Date",y="Count",color="CCAA", title="Total recoveries by region")
fig.show()

create individual daily infections and deaths for each region and plot the most critical ones to analyze the progress.

In [50]:
for i in data.CCAA.unique(): 
    
    a = i.replace(".","")
    a = a.replace(" ","_")
    
    exec('df_{}=data[data.CCAA == i]'.format(a))
    
    exec('aux_a = df_{}.Infected.to_list()'.format(a))
    
    
    daily=[]
    for i in range(len(aux_a)-1):
        b = aux_a[i+1] - aux_a[i]
        daily.append(b)
    
    daily.insert(0,0)   

    exec('df_{}["Daily_infected"] = daily'.format(a))
    
    exec('aux_d = df_{}.Deaths.to_list()'.format(a))
    
    
    daily=[]
    for i in range(len(aux_d)-1):
        b = aux_d[i+1] - aux_d[i]
        daily.append(b)
    
    daily.insert(0,0)   

    exec('df_{}["Daily_deaths"] = daily'.format(a))
In [51]:
df_daily_infected = pd.DataFrame({"Date":data.Date.unique(),
                                 "Madrid":df_Madrid["Daily_infected"].values,
                                 "Cataluña":df_Cataluña["Daily_infected"].values,
                                 "Andalucia":df_Andalucía["Daily_infected"].values,
                                 "Castilla La Mancha":df_Castilla_La_Mancha["Daily_infected"].values,
                                 "Castilla y Leon":df_Castilla_y_León["Daily_infected"].values,
                                 "País Vasco":df_País_Vasco["Daily_infected"].values})
In [52]:
aux_i = df_daily_infected.melt(id_vars="Date", value_vars=("Madrid","Cataluña","Andalucia","Castilla La Mancha","Castilla y Leon","País Vasco"), value_name="Count" , var_name= "CCAA")
In [53]:
aux_1=aux_i[aux_i.Date>"20-02-2020"]
In [54]:
fig = px.line (aux_1, x= "Date", y = "Count", color="CCAA", title= "Daily infections in Spain (Top 6)")
fig.show()
In [55]:
aux_2=aux_i[aux_i.Date>"18-04-2020"]
fig = px.bar (aux_2, x= "Date", y = "Count", color="CCAA", title= "Daily infections in Spain (Top 6)")
fig.show()
In [56]:
df_daily_fatalities = pd.DataFrame({"Date":data.Date.unique(),
                                 "Madrid":df_Madrid["Daily_deaths"].values,
                                 "Cataluña":df_Cataluña["Daily_deaths"].values,
                                 "Valencia":df_C_Valenciana["Daily_deaths"].values,
                                 "Castilla La Mancha":df_Castilla_La_Mancha["Daily_deaths"].values,
                                 "Castilla y Leon":df_Castilla_y_León["Daily_deaths"].values,
                                 "País Vasco":df_País_Vasco["Daily_deaths"].values})
In [57]:
aux_f = df_daily_fatalities.melt(id_vars="Date", value_vars=("Madrid","Cataluña","Valencia","Castilla La Mancha","Castilla y Leon","País Vasco"), value_name="Count" , var_name= "CCAA")
In [58]:
fig = px.line (aux_f, x= "Date", y = "Count", color="CCAA", title= "Daily Death in Spain (Top 6)")
fig.show()
In [59]:
fig = px.bar (aux_f, x= "Date", y = "Count", color="CCAA", title= "Daily Deaths in Spain (Top 6)")
fig.show()

Mapping with chloropleth

In [60]:
json = "shapefiles_ccaa_espana.geojson"
#geo = gpd.read_file(json)
In [61]:
data.CCAA.unique()
Out[61]:
array(['Andalucía', 'Aragón', 'Asturias', 'Baleares', 'Canarias',
       'Cantabria', 'Castilla La Mancha', 'Castilla y León', 'Cataluña',
       'Ceuta', 'C. Valenciana', 'Extremadura', 'Galicia', 'Madrid',
       'Melilla', 'Murcia', 'Navarra', 'País Vasco', 'La Rioja'],
      dtype=object)
In [62]:
geo.rename(columns={"name_0":"Country",
                        "name_1":"CCAA"},inplace= True)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-62-e62fd78e209b> in <module>
----> 1 geo.rename(columns={"name_0":"Country",
      2                         "name_1":"CCAA"},inplace= True)

NameError: name 'geo' is not defined
In [ ]:
geo.drop(columns=["id_0","varname_1","nl_name_1","cc_1","type_1","engtype_1","validfr_1","validto_1","remarks_1",
                      "cartodb_id","created_at","updated_at"], inplace=True)
In [ ]:
geo.replace({"Castilla-La Mancha":"Castilla La Mancha","Islas Baleares":"Baleares","Islas Canarias":"Canarias",
                "Principado de Asturias":"Asturias","Región de Murcia":"Murcia","Ceuta y Melilla":"Ceuta",
                 "Comunidad de Madrid":"Madrid","Comunidad Foral de Navarra":"Navarra","Comunidad Valenciana":"C. Valenciana"},inplace=True)
In [ ]:
mapa=geo.merge(com,on="CCAA",how="left")
In [ ]:
mapa.head()
Out[ ]:
iso Country id_1 CCAA hasc_1 shape_leng shape_area geometry Date Infected Deaths Cured Hospitalized UCI
0 ESP Spain 935 Andalucía ES.AN 26.409069 8.916415 MULTIPOLYGON (((-6.21958 36.38110, -6.21958 36... 2020-05-17 12458.0 1358.0 10671.0 6178.0 761.0
1 ESP Spain 936 Aragón ES.AR 15.095307 5.150835 MULTIPOLYGON (((-0.74582 42.92190, -0.74409 42... 2020-05-17 5503.0 838.0 3772.0 2659.0 267.0
2 ESP Spain 937 Cantabria ES.CB 9.085137 0.588004 MULTIPOLYGON (((-3.28365 43.20003, -3.28500 43... 2020-05-17 2271.0 207.0 2287.0 1035.0 79.0
3 ESP Spain 938 Castilla y León ES.CL 28.330904 10.200456 MULTIPOLYGON (((-2.83047 42.79212, -2.82676 42... 2020-05-17 18512.0 1953.0 8716.0 8717.0 551.0
4 ESP Spain 939 Castilla La Mancha ES.CM 23.325765 8.330905 MULTIPOLYGON (((-3.36417 40.63507, -3.36655 40... 2020-05-17 16648.0 2898.0 6392.0 9066.0 637.0
In [ ]:
mapa["Time"] = mapa.Date.apply(lambda x: x.strftime("%d %b, %Y"))
In [ ]:
import json

with open("shapefiles_ccaa_espana.geojson") as f:
    geo = json.load(f)
In [ ]:
fig = px.choropleth_mapbox(mapa, geojson=geo, locations='id_1', color='Infected',
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 40.4167, "lon": -3.070325},
                           labels={'CCAA':'Infected'},
                           hover_name="CCAA",
                           title="Infections by regional governments"
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
In [ ]: